from pyspark import SparkConf, SparkContext
import os
os.environ['PYSPARK_PYTHON'] = r"C:\Users\admin\AppData\Local\Programs\Python\Python38\python.exe"
conf = SparkConf().setMaster("local[*]").setAppName("test_spark")
sc = SparkContext(conf=conf)
# 准备一个RDD
rdd = sc.parallelize([('男', 99), ('男', 88), ('女', 99), ('女', 66)])
# 求男生和女生两个组的成绩之和
# reduceByKey方法将相同键的值按 func 规则聚合，生成每个键对应的唯一值。
rdd2 = rdd.reduceByKey(lambda a, b: a + b)
print(rdd2.collect())